In this report, I will take you through a re-analysis methylation data first described by Choufani et al (2019).
In their study, they analysed the DNA methylation patterns of 44 placenta samples conceived naturally (NAT) and 44 placenta samples conceivced through artificial reproductive technologies (ART) (23 in vitro, 21 in vivo).
The platform used in the study is the Illumina Infinium HumanMethylation450k BeadChip assay.
The methylation data have been deposited to NCBI GEO repository accession number GSE120250.
These packackes will help us to perform vital steps such as normalisation, filtering, differential analysis, etc, and provide information about the array probe annotaions.
suppressPackageStartupMessages({
library("R.utils")
library("missMethyl")
library("limma")
library("topconfects")
library("minfi")
library("IlluminaHumanMethylation450kmanifest")
library("MethylToSNP")
library("RColorBrewer")
library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
library("eulerr")
library("plyr")
library("gplots")
library("reshape2")
library("beeswarm")
library("GEOquery")
library("readxl")
})
# Annotation
ann450k <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
myann <- data.frame(ann450k[,c("UCSC_RefGene_Name","Regulatory_Feature_Group")])
promoters <- grep("Prom",myann$Regulatory_Feature_Group)
These functions provide shortcuts to help with charts and other analysis. They will eventually be shoved into another Rscript or package but can stay here for now.
# scree plot shows the amount of variation in a dataset that is accounted
# for by the first N principal components
myscree <- function(mx,n=10,main="") {
pc <- princomp(mx)$sdev
pcp <- pc/sum(pc)*100
pcp <- pcp[1:10]
barplot(pcp,cex.names = 1,las=2,ylim=c(0,60),
ylab="percent (%) variance explained", main=main)
text((0.5:length(pcp)*1.2),pcp,label=signif(pcp,3),pos=3,cex=0.8)
}
# Here is a function to make a volcano plot
make_volcano <- function(dm,name,mx) {
sig <- subset(dm,adj.P.Val<0.05)
N_SIG=nrow(sig)
N_UP=nrow(subset(sig,logFC>0))
N_DN=nrow(subset(sig,logFC<0))
HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn")
plot(dm$logFC,-log10(dm$P.Val),cex=0.5,pch=19,col="darkgray",
main=name, xlab="log FC", ylab="-log10 pval")
mtext(HEADER)
grid()
points(sig$logFC,-log10(sig$P.Val),cex=0.5,pch=19,col="red")
}
# Here is a function to make heatmaps
make_heatmap <- function(dm,name,mx,n) {
topgenes <- rownames(head(dm[order(dm$P.Value),],n))
ss <- mx[which(rownames(mx) %in% topgenes),]
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
heatmap.2(ss,scale="row",margin=c(10, 10),cexRow=0.4,trace="none",cexCol=0.4,
col=my_palette, main=name)
}
# make beeswarm charts
# dm = a limma differential meth object
# name = character name of the limma dm object
# mx = matrix of normalised data
# groups = a vector of factors corresponding to the cols in mx
# n = the number of top significant genes to plot (default = 15)
make_beeswarms <- function(dm,name,mx,groups,n=15) {
par(mar=c(3,3,1,1))
NCOLS=5
NROWS=floor(n/NCOLS)
if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
par(mfrow=c(NROWS, NCOLS))
topgenes <- rownames(head(dm[order(dm$P.Value),],n))
ss <- mx[which(rownames(mx) %in% topgenes),]
n <- 1:n
g1name=levels(groups)[1]
g2name=levels(groups)[2]
g1dat <- ss[n,which(groups == g1name)]
g2dat <- ss[n,which(groups == g2name)]
g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)
for (i in n) {
mydat <- list(g1l[[i]],g2l[[i]])
beeswarm(mydat,ylim=c(0,1),cex=0.2, pch=19,
las=2, cex.lab=0.6, main=names( g1l )[i] ,
ylab="",labels = c(g1name,g2name))
grid()
}
}
# make beeswarm charts for best confects
# dm = a limma differential meth object
# name = character name of the limma dm object
# mx = matrix of normalised data
# groups = a vector of factors corresponding to the cols in mx
# n = the number of top significant genes to plot (default = 15)
make_beeswarms_confects <- function(confects,name,mx,groups,n=15) {
par(mar=c(3,3,1,1))
NCOLS=5
NROWS=floor(n/NCOLS)
if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
par(mfrow=c(NROWS, NCOLS))
topgenes <- head(confects$table,n)$name
ss <- mx[which(rownames(mx) %in% topgenes),]
n <- 1:n
g1name=levels(groups)[1]
g2name=levels(groups)[2]
g1dat <- ss[n,which(groups == g1name)]
g2dat <- ss[n,which(groups == g2name)]
g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)
for (i in n) {
mydat <- list(g1l[[i]],g2l[[i]])
beeswarm(mydat,ylim=c(0,1),cex=0.2, pch=19,
las=2, cex.lab=0.6, main=names( g1l )[i] ,
ylab="",labels = c(g1name,g2name))
grid()
}
}
# this is a wrapper which creates three charts
# We will be adding more
make_dm_plots <- function(dm,name,mx,groups=groups,confects=confects) {
make_volcano(dm,name,mx)
make_beeswarms(dm ,name , mx , groups , n= 15)
make_heatmap(dm , name , mx ,n = 50)
make_beeswarms_confects(confects, name, mx, groups, n=15)
}
# this is a function which will perform differential methylation analysis
# if you provide it with the right inputs
dm_analysis <- function(samplesheet,sex,groups,mx,name,myann,beta) {
design <- model.matrix(~ sex + groups)
mxs <- mx[,which( colnames(mx) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
dm <- topTable(fit.reduced,coef=3, number = Inf)
dma <- merge(myann,dm,by=0)
dma <- dma[order(dma$P.Value),]
dm_up <- rownames(subset(dm,adj.P.Val<0.05 & logFC>0))
dm_dn <- rownames(subset(dm,adj.P.Val<0.05 & logFC<0))
length(dm_up)
length(dm_dn)
confects <- limma_confects(fit.reduced, coef=3, fdr=0.05)
make_dm_plots(dm = dm ,name=name , mx=beta, groups= groups, confects=confects)
dat <- list("dma"=dma, "dm_up"=dm_up, "dm_dn"=dm_dn, "confects"=confects)
}
Importing Data from GEO. Raw IDAT files and accompanying Series Matrix publicly available on GEO.
#Create Dirtectory
dir.create("GSE120250")
## Warning in dir.create("GSE120250"): 'GSE120250' already exists
ARRAY_SAMPLESHEET="GSE120250/GSE120250_series_matrix.txt.gz"
# only download it if it is not present on the system
if ( !file.exists(ARRAY_SAMPLESHEET ) ) {
DLFILE=paste(ARRAY_SAMPLESHEET,".gz",sep="")
download.file("https://ftp.ncbi.nlm.nih.gov/geo/series/GSE120nnn/GSE120250/matrix/GSE120250_series_matrix.txt.gz",
destfile = DLFILE)
gunzip(DLFILE)
}
# Reading in GEO series matrix
gse <- getGEO(filename="/mnt/mnorris/ART_methylation/GSE120250/GSE120250_series_matrix.txt.gz")
## Parsed with column specification:
## cols(
## .default = col_double(),
## ID_REF = col_character()
## )
## See spec(...) for full column specifications.
## File stored at:
## /tmp/RtmpI4KgiP/GPL13534.soft
## Warning: 65 parsing failures.
## row col expected actual file
## 485513 SPOT_ID 1/0/T/F/TRUE/FALSE rs10796216 literal data
## 485514 SPOT_ID 1/0/T/F/TRUE/FALSE rs715359 literal data
## 485515 SPOT_ID 1/0/T/F/TRUE/FALSE rs1040870 literal data
## 485516 SPOT_ID 1/0/T/F/TRUE/FALSE rs10936224 literal data
## 485517 SPOT_ID 1/0/T/F/TRUE/FALSE rs213028 literal data
## ...... ....... .................. .......... ............
## See problems(...) for more details.
ARRAY_DATA="GSE120250/GSE120250_RAW.tar"
# only download it if it is not present on the system
if ( !dir.exists("GSE120250/IDAT") ) {
dir.create("GSE120250/IDAT")
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE120250&format=file",
destfile = ARRAY_DATA)
untar(exdir = "GSE120250/IDAT", tarfile = "GSE120250/GSE120250_RAW.tar",)
}
baseDir <- "./GSE120250"
targets <- pData(phenoData(gse))
mybase <- unique(gsub("_Red.idat.gz" ,"", gsub("_Grn.idat.gz", "" ,list.files("./GSE120250",pattern = "GSM",recursive = TRUE))))
targets$Basename <- paste("GSE120250/", mybase, sep = "")
head(targets$Basename)
## [1] "GSE120250/IDAT/GSM3396785_9992576119_R02C01"
## [2] "GSE120250/IDAT/GSM3396786_9992576022_R02C02"
## [3] "GSE120250/IDAT/GSM3396787_9992576033_R01C02"
## [4] "GSE120250/IDAT/GSM3396788_9992576022_R04C02"
## [5] "GSE120250/IDAT/GSM3396789_9992576207_R03C02"
## [6] "GSE120250/IDAT/GSM3396790_9992576022_R05C02"
rgSet <- read.metharray.exp(targets = targets)
mSet <- preprocessRaw(rgSet)
mSetSw <- SWAN(mSet,verbose=FALSE)
## [SWAN] Preparing normalization subset
## 450k
## [SWAN] Normalizing methylated channel
## [SWAN] Normalizing unmethylated channel
par(mfrow=c(1,2))
densityByProbeType(mSet[,1], main = "Raw")
densityByProbeType(mSetSw[,1], main = "SWAN")
Figure 1. Normalisation of bead-array data with SWAN.
Here we are running parallel analyses, both including and excluding sex chromosomes.
# include sex chromosomes
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]
# exclude SNP probes
#mSetSw <- mapToGenome(mSetSw)
#mSetSw_nosnp <- dropLociWithSnps(mSetSw)
#dim(mSetSw)
#dim(mSetSw_nosnp)
#mSetSw <- mSetSw_nosnp
# exclude sex chromosomes
keep <- !(featureNames(mSetSw) %in% ann450k$Name[ann450k$chr %in% c("chrX","chrY")])
mSetFlt <- mSetSw[keep,]
head(mSetFlt)
## class: MethylSet
## dim: 6 88
## metadata(0):
## assays(2): Meth Unmeth
## rownames(6): cg00000957 cg00001349 ... cg00002719 cg00002837
## rowData names(0):
## colnames(88): GSM3396785_9992576119_R02C01 GSM3396786_9992576022_R02C02
## ... GSM3396871_9992576119_R05C01 GSM3396872_9992576207_R02C02
## colData names(39): title geo_accession ... Basename filenames
## Annotation
## array: IlluminaHumanMethylation450k
## annotation: ilmn12.hg19
## Preprocessing
## Method: SWAN (based on a MethylSet preprocessed as 'Raw (no normalization or bg correction)')
## minfi version: 1.32.0
## Manifest version: 0.4.0
dim(mSetFlt)
## [1] 468596 88
# include sex chromosomes
meth <- getMeth(mSetSw)
unmeth <- getUnmeth(mSetSw)
Mval <- log2((meth + 100)/(unmeth + 100))
beta <- getBeta(mSetSw)
# exclude sex chromosomes
meth <- getMeth(mSetFlt)
unmeth <- getUnmeth(mSetFlt)
Mval_flt <- log2((meth + 100)/(unmeth + 100))
beta_flt <- getBeta(mSetFlt)
DOI: https://doi.org/10.1186/s13072-019-0321-6
#testing
#test <- MethylToSNP(beta, gap.ratio = 0.5, gap.sum.ratio = 0.4, SNP=SNPs.147CommonSingle #,verbose=FALSE)
#nrow(test)
# include sex chromosomes
# evidence of SNP
#snp_probe_dat <- MethylToSNP(beta, gap.ratio = 0.5, gap.sum.ratio = 0.4,
# SNP=SNPs.147CommonSingle ,verbose=FALSE)
#dim(snp_probe_dat)
#head(snp_probe_dat)
#snp_probes <- rownames(snp_probe_dat)
# >50% confidence of SNP
#nrow(snp_probe_dat[which(snp_probe_dat$confidence>0.5),])
# remove SNP probes
#Mval <- Mval[which(!rownames(Mval) %in% snp_probes),]
#dim(Mval)
# evidence of SNP
#snp_probe_dat <- MethylToSNP(beta_flt, SNP=SNPs.147CommonSingle ,verbose=FALSE)
#dim(snp_probe_dat)
#head(snp_probe_dat)
#snp_probes <- rownames(snp_probe_dat)
# >50% confidence of SNP
#nrow(snp_probe_dat[which(snp_probe_dat$confidence>0.5),])
# remove SNP probes
#Mval_flt <- Mval_flt[which(!rownames(Mval_flt) %in% snp_probes),]
#dim(Mval_flt)
[Multidimensional scaling(https://en.wikipedia.org/wiki/Multidimensional_scaling) plot is a method used to identify the major sources of variation in a dataset. In the MDS plots below, I will be plotting the first two dimensions (principal components [PCs]), with each sample label coloured either by ART classification, sex, ART and sex, and then array chip and then sample plate.
We wil begin with MDS analysis including the sex chromosomes and then exclude them.
First, let’s quantify the contribution of the major principal components. with a scree plot, we can see whether most of the variation is captured in the first two PCs or whether it is spread over more PCs.
par(mfrow=c(2,1))
myscree(Mval,main="incl sex chr")
myscree(Mval_flt,main="excl sex chr")
Figure 2. Screeplot shows contribution of top principal components to the overall variation in the experiment.
sample_groups <- factor(targets$characteristics_ch1.2)
head(sample_groups)
## V2 V3 V4
## art treatment: in vitro art treatment: in vivo art treatment: in vitro
## V5 V6 V7
## art treatment: in vivo art treatment: in vitro art treatment: in vitro
## Levels: art treatment: in vitro art treatment: in vivo art treatment: NA
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
colours <- colour_palette[as.integer(factor(targets$art))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by ART type")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 3. MDS plot coloured by ART classification.
mydist <- plotMDS(Mval, labels=rownames(targets),col=colours,main="sex chromosomes included")
Figure 3. MDS plot coloured by ART classification.
mydist_flt <- plotMDS(Mval_flt, labels=rownames(targets),col=colours,main="sex chromosomes excluded")
Figure 3. MDS plot coloured by ART classification.
sample_groups <- factor(targets$characteristics_ch1.1)
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
## Warning in brewer.pal(n = length(levels(sample_groups)), name = "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
colours <- colour_palette[as.integer(factor(targets$characteristics_ch1.1))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by sex")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 4. MDS plot coloured by sex.
plotMDS(mydist, labels=rownames(targets),col=colours,main="sex chromosomes included")
Figure 4. MDS plot coloured by sex.
plotMDS(mydist_flt, labels=rownames(targets) ,col=colours,main="sex chromosomes excluded")
Figure 4. MDS plot coloured by sex.
targets$ARTgender <- paste(targets$`art treatment:ch1`,targets$`gender:ch1`, sep=("_"))
targets$ARTgender
## [1] "in vitro_M" "in vivo_M" "in vitro_M" "in vivo_F" "in vitro_M"
## [6] "in vitro_F" "in vitro_F" "in vivo_F" "in vivo_F" "in vitro_F"
## [11] "in vitro_F" "in vitro_F" "in vivo_F" "in vitro_F" "in vivo_M"
## [16] "in vitro_F" "in vitro_M" "in vitro_M" "in vivo_M" "in vitro_M"
## [21] "in vitro_M" "in vitro_M" "in vivo_F" "in vitro_F" "in vivo_F"
## [26] "in vivo_F" "in vivo_M" "in vitro_M" "in vitro_M" "in vivo_M"
## [31] "in vitro_F" "in vitro_F" "in vivo_M" "in vivo_M" "in vitro_M"
## [36] "in vivo_M" "in vivo_F" "in vivo_M" "in vitro_F" "in vitro_M"
## [41] "in vivo_M" "in vivo_F" "in vivo_M" "in vivo_M" "NA_F"
## [46] "NA_F" "NA_M" "NA_F" "NA_M" "NA_F"
## [51] "NA_F" "NA_M" "NA_F" "NA_M" "NA_M"
## [56] "NA_M" "NA_M" "NA_F" "NA_F" "NA_M"
## [61] "NA_F" "NA_F" "NA_M" "NA_M" "NA_F"
## [66] "NA_F" "NA_M" "NA_F" "NA_M" "NA_M"
## [71] "NA_F" "NA_M" "NA_M" "NA_M" "NA_M"
## [76] "NA_F" "NA_F" "NA_F" "NA_M" "NA_M"
## [81] "NA_F" "NA_M" "NA_F" "NA_F" "NA_M"
## [86] "NA_F" "NA_M" "NA_M"
sample_groups <- factor(targets$ARTgender)
sample_groups
## [1] in vitro_M in vivo_M in vitro_M in vivo_F in vitro_M in vitro_F
## [7] in vitro_F in vivo_F in vivo_F in vitro_F in vitro_F in vitro_F
## [13] in vivo_F in vitro_F in vivo_M in vitro_F in vitro_M in vitro_M
## [19] in vivo_M in vitro_M in vitro_M in vitro_M in vivo_F in vitro_F
## [25] in vivo_F in vivo_F in vivo_M in vitro_M in vitro_M in vivo_M
## [31] in vitro_F in vitro_F in vivo_M in vivo_M in vitro_M in vivo_M
## [37] in vivo_F in vivo_M in vitro_F in vitro_M in vivo_M in vivo_F
## [43] in vivo_M in vivo_M NA_F NA_F NA_M NA_F
## [49] NA_M NA_F NA_F NA_M NA_F NA_M
## [55] NA_M NA_M NA_M NA_F NA_F NA_M
## [61] NA_F NA_F NA_M NA_M NA_F NA_F
## [67] NA_M NA_F NA_M NA_M NA_F NA_M
## [73] NA_M NA_M NA_M NA_F NA_F NA_F
## [79] NA_M NA_M NA_F NA_M NA_F NA_F
## [85] NA_M NA_F NA_M NA_M
## Levels: in vitro_F in vitro_M in vivo_F in vivo_M NA_F NA_M
colour_palette=brewer.pal(n = length(levels(sample_groups)), name = "Paired")
colours <- colour_palette[as.integer(factor(targets$ARTgender))]
plot(1,axes = FALSE,xlab="",ylab="",main="MDS by ART and sex")
legend("center",legend=levels(sample_groups),pch=16,cex=1.2,col=colour_palette)
Figure 5. MDS plot coloured by ART classification and sex.
plotMDS(mydist, labels=rownames(targets), col=colours,main="sex chromosomes included")
Figure 5. MDS plot coloured by ART classification and sex.
plotMDS(mydist_flt, labels=rownames(targets), col=colours,main="sex chromosomes excluded")
Figure 5. MDS plot coloured by ART classification and sex.
There are a few differential contrasts that would be of interest to us in this study:
NAT vs ART:
NAT vs in vitro: .
NAT vs in vivo: .
in vivo vs in vitro:
The differential analysis is centred around limma to identify differentially methylated probes. TopConfects was also run to obtain the probes with the largest confident effect (topconfect). There are five outputs below:
Volcano plot (limma result).
Beeswarm plot (top probes by limma p-value).
Heatmap (top probes by limma p-value).
Beeswarm plot (top probes by topconfect ranking).
Heatmap (top probes by topconfect ranking). (TODO)
# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vivo")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vivo"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vivo
## Down 210046 5869 0
## NotSig 21546 469494 479729
## Up 248137 4366 0
top_NA_vs_invivo_inc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval,name="top_NA_vs_invivo_inc",
myann=myann ,beta= beta)
Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes
Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes
Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes
Figure 6: Differential analysis: Naturally conceived vs in vivo including sex chromosomes
head(top_NA_vs_invivo_inc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 37792 cg01866330 SLC45A4 -0.4229012
## 238262 cg13045913 DNAJC17;ZFYVE19 Promoter_Associated 0.5620240
## 313464 cg17328716 LOC650226 0.5320257
## 189567 cg10071113 TSSC1 -0.2969455
## 151933 cg07909498 -1.2108576
## 41161 cg02030958 -0.3464413
## AveExpr t P.Value adj.P.Val B
## 37792 3.553625 -4.859240 7.570646e-06 0.5252591 0.5177210257
## 238262 -2.541986 4.842400 8.059513e-06 0.5252591 0.4869350987
## 313464 -1.554288 4.781452 1.010040e-05 0.5252591 0.3757144407
## 189567 3.425431 -4.589701 2.038524e-05 0.5252591 0.0280579483
## 151933 -1.831452 -4.574248 2.156042e-05 0.5252591 0.0002065045
## 41161 4.131721 -4.572559 2.169281e-05 0.5252591 -0.0028362871
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vivo")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vivo"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval_flt[,which( colnames(Mval) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vivo
## Down 204127 875 0
## NotSig 21114 467621 468596
## Up 243355 100 0
top_NA_vs_invivo_exc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval_flt,name="top_NA_vs_invivo_exc",
myann=myann ,beta= beta)
Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes
Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes
Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes
Figure 7: Differential analysis: Naturally conceived vs in vivo excluding sex chromosomes
head(top_NA_vs_invivo_exc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 36833 cg01866330 SLC45A4 -0.4229012
## 232680 cg13045913 DNAJC17;ZFYVE19 Promoter_Associated 0.5620240
## 306258 cg17328716 LOC650226 0.5320257
## 185055 cg10071113 TSSC1 -0.2969455
## 40110 cg02030958 -0.3464413
## 148234 cg07909498 -1.2108576
## AveExpr t P.Value adj.P.Val B
## 36833 3.553625 -4.860724 7.531265e-06 0.5208319 0.54350196
## 232680 -2.541986 4.843020 8.043362e-06 0.5208319 0.51097288
## 306258 -1.554288 4.782161 1.007688e-05 0.5208319 0.39934829
## 185055 3.425431 -4.592608 2.017638e-05 0.5208319 0.05391657
## 40110 4.131721 -4.574547 2.154247e-05 0.5208319 0.02119920
## 148234 -1.831452 -4.573994 2.158568e-05 0.5208319 0.02019829
# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vitro
## Down 209654 4936 0
## NotSig 20466 470575 479724
## Up 249609 4218 5
top_NA_vs_invitro_inc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval,name="top_NA_vs_invitro_inc",
myann=myann ,beta= beta)
Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes
Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes
Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes
Figure 8: Differential analysis: Naturally conceived vs in vitro including sex chromosomes
head(top_NA_vs_invitro_inc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 183102 cg09685182 1.5353535
## 383766 cg21858485 1.4074114
## 45648 cg02264082 0.9171541
## 309783 cg17104425 PODN Unclassified_Cell_type_specific 0.5873643
## 72953 cg03651292 0.7774220
## 82153 cg04136610 ADAMTS16 0.9467504
## AveExpr t P.Value adj.P.Val B
## 183102 -2.3053388 7.008182 1.366032e-09 0.0006553252 8.304303
## 383766 -2.0789440 6.468313 1.265799e-08 0.0030362021 6.801015
## 45648 -1.8380561 5.930313 1.122454e-07 0.0179491197 5.303081
## 309783 -2.3314637 5.643580 3.518952e-07 0.0378088757 4.510037
## 72953 -0.6006281 5.614945 3.940649e-07 0.0378088757 4.431179
## 82153 -2.4370975 5.443791 7.721237e-07 0.0604027464 3.961462
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="NA" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("NA","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vitro
## Down 203748 531 0
## NotSig 20035 467977 468591
## Up 244813 88 5
top_NA_vs_invitro_exc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval_flt,name="top_NA_vs_invitro_exc",
myann=myann ,beta= beta)
Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes
Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes
Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes
Figure 9: Differential analysis: Naturally conceived vs in vitro excluding sex chromosomes
head(top_NA_vs_invitro_exc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 178741 cg09685182 1.5353535
## 374878 cg21858485 1.4074114
## 44485 cg02264082 0.9171541
## 302653 cg17104425 PODN Unclassified_Cell_type_specific 0.5873643
## 71190 cg03651292 0.7774220
## 80135 cg04136610 ADAMTS16 0.9467504
## AveExpr t P.Value adj.P.Val B
## 178741 -2.3053388 7.008114 1.367185e-09 0.0006406572 8.345059
## 374878 -2.0789440 6.468255 1.266667e-08 0.0029677754 6.835851
## 44485 -1.8380561 5.930576 1.121660e-07 0.0175201782 5.333133
## 302653 -2.3314637 5.644565 3.506316e-07 0.0368842100 4.539169
## 71190 -0.6006281 5.615344 3.935609e-07 0.0368842100 4.458403
## 80135 -2.4370975 5.443910 7.719732e-07 0.0584033459 3.986219
# Include Sex Chromosomes
samplesheet <- targets
samplesheet$title <- as.character(samplesheet$title)
samplesheet$title <- (sapply(strsplit(samplesheet$title, " "), "[[", 1))
samplesheet$title <- (sapply(strsplit(samplesheet$title, "_"), "[[", 1))
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$title,levels=c("ART","Control"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsControl
## Down 208613 8714 0
## NotSig 19308 466242 479729
## Up 251808 4773 0
top_ART_vs_Control_inc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval,name="top_ART_vs_Natural_inc",
myann=myann ,beta= beta)
Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes
Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes
Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes
Figure 10: Differential analysis: Naturally conceived vs ART including sex chromosomes
head(top_ART_vs_Control_inc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 437740 cg25296923 0.3538648
## 473617 cg27477884 -0.3533839
## 37792 cg01866330 SLC45A4 0.3440090
## 262670 cg14305701 INPP5A 0.7627745
## 88939 cg04490037 DDC 0.2033697
## 279210 cg15240419 0.1825872
## AveExpr t P.Value adj.P.Val B
## 437740 2.1818350 5.027381 2.549229e-06 0.4468383 3.142730
## 473617 0.5237203 -4.878135 4.650727e-06 0.4468383 2.713563
## 37792 3.5178538 4.861294 4.974325e-06 0.4468383 2.665510
## 262670 3.2650169 4.845725 5.292930e-06 0.4468383 2.621155
## 88939 0.3936439 4.830849 5.615854e-06 0.4468383 2.578837
## 279210 3.8673110 4.722155 8.631677e-06 0.4468383 2.271538
# Exclude Sex Chromosomes
samplesheet <- targets
samplesheet$title <- as.character(samplesheet$title)
samplesheet$title <- (sapply(strsplit(samplesheet$title, " "), "[[", 1))
samplesheet$title <- (sapply(strsplit(samplesheet$title, "_"), "[[", 1))
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$title,levels=c("ART","Control"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsControl
## Down 202723 1990 0
## NotSig 18901 466407 468596
## Up 246972 199 0
top_ART_vs_Control_exc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval_flt,name="top_ART_vs_Natural_exc",
myann=myann ,beta= beta)
Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes
Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes
Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes
Figure 11: Differential analysis: Naturally conceived vs ART excluding sex chromosomes
head(top_ART_vs_Control_exc$dma)
## Row.names UCSC_RefGene_Name Regulatory_Feature_Group logFC
## 427640 cg25296923 0.3538648
## 462648 cg27477884 -0.3533839
## 36833 cg01866330 SLC45A4 0.3440090
## 256565 cg14305701 INPP5A 0.7627745
## 86764 cg04490037 DDC 0.2033697
## 272770 cg15240419 0.1825872
## AveExpr t P.Value adj.P.Val B
## 427640 2.1818350 5.028467 2.538491e-06 0.440713 3.183405
## 462648 0.5237203 -4.879111 4.633396e-06 0.440713 2.751332
## 36833 3.5178538 4.862330 4.954640e-06 0.440713 2.703161
## 256565 3.2650169 4.845670 5.294983e-06 0.440713 2.655414
## 86764 0.3936439 4.834363 5.538812e-06 0.440713 2.623055
## 272770 3.8673110 4.726287 8.493875e-06 0.440713 2.315591
# Include Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="in vivo" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("in vivo","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval[,which( colnames(Mval) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vitro
## Down 205664 35877 10
## NotSig 26700 430659 479624
## Up 247365 13193 95
top_invivo_vs_invitro_inc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval,name="top_invivo_vs_invitro_inc",
myann=myann ,beta= beta)
Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes
Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes
Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes
Figure 12: Differential analysis: in vitro vs in vivo including sex chromosomes
head(top_invivo_vs_invitro_inc$dma)
## Row.names UCSC_RefGene_Name
## 72841 cg03646096
## 28936 cg01408508
## 446460 cg25860795 FLI1;FLI1
## 49140 cg02441747 COL25A1;COL25A1;COL25A1;COL25A1
## 276765 cg15084543 ELTD1;ELTD1
## 115628 cg05937969 CASR
## Regulatory_Feature_Group logFC AveExpr t
## 72841 1.2416083 2.84813440 7.202227
## 28936 Unclassified 1.0039389 -0.02762027 6.368744
## 446460 Promoter_Associated 1.3978054 -1.92479009 6.201598
## 49140 Unclassified_Cell_type_specific 0.9461161 -1.83889964 6.140996
## 276765 1.4338091 -1.40566774 6.008296
## 115628 0.9947499 -1.91755002 5.937261
## P.Value adj.P.Val B
## 72841 4.931350e-09 0.002365712 9.141197
## 28936 8.605989e-08 0.020642712 6.867852
## 446460 1.528173e-07 0.022566761 6.405877
## 49140 1.881626e-07 0.022566761 6.238068
## 276765 2.966493e-07 0.027618011 5.870164
## 115628 3.784140e-07 0.027618011 5.673028
# Exclude Sex Chromosomes
samplesheet <- subset(targets,`art treatment:ch1`=="in vivo" | `art treatment:ch1`=="in vitro")
samplesheet$Basename <- sapply(strsplit(samplesheet$Basename, "/"), "[[", 3)
groups <- factor(samplesheet$`art treatment:ch1`,levels=c("in vivo","in vitro"))
sex <- factor(samplesheet$`gender:ch1`,levels=c("M","F"))
design <- model.matrix(~ sex + groups)
mxs <- Mval_flt[,which( colnames(Mval_flt) %in% samplesheet$Basename )]
fit.reduced <- lmFit(mxs,design)
fit.reduced <- eBayes(fit.reduced)
summary(decideTests(fit.reduced))
## (Intercept) sexF groupsin vitro
## Down 199815 25742 10
## NotSig 26162 436539 468494
## Up 242619 6315 92
top_invivo_vs_invitro_exc <- dm_analysis(samplesheet=samplesheet,
sex=sex,groups=groups,mx=Mval_flt,name="top_invivo_vs_invitro_exc",
myann=myann ,beta= beta)
Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes
Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes
Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes
Figure 13: Differential analysis: in vitro vs in vivo excluding sex chromosomes
head(top_invivo_vs_invitro_exc$dma)
## Row.names UCSC_RefGene_Name
## 71081 cg03646096
## 28188 cg01408508
## 436140 cg25860795 FLI1;FLI1
## 47905 cg02441747 COL25A1;COL25A1;COL25A1;COL25A1
## 270368 cg15084543 ELTD1;ELTD1
## 112763 cg05937969 CASR
## Regulatory_Feature_Group logFC AveExpr t
## 71081 1.2416083 2.84813440 7.202424
## 28188 Unclassified 1.0039389 -0.02762027 6.369140
## 436140 Promoter_Associated 1.3978054 -1.92479009 6.201308
## 47905 Unclassified_Cell_type_specific 0.9461161 -1.83889964 6.141440
## 270368 1.4338091 -1.40566774 6.007949
## 112763 0.9947499 -1.91755002 5.937485
## P.Value adj.P.Val B
## 71081 4.936159e-09 0.002313064 9.133532
## 28188 8.604697e-08 0.020160634 6.862791
## 436140 1.531422e-07 0.022033799 6.399312
## 47905 1.880835e-07 0.022033799 6.233669
## 270368 2.973122e-07 0.027863858 5.863874
## 112763 3.785047e-07 0.029560965 5.668483
First, we look at the similarity of DMPs altered between in vivo and in vitro.
#v1 <- list("in vivo up" = top_NA_vs_invivo_exc$dm_up ,
# "in vitro up" = top_NA_vs_invitro_exc$dm_up ,
# "in vivo dn" = top_NA_vs_invivo_exc$dm_dn ,
# "in vitro dn" = top_NA_vs_invitro_exc$dm_dn )
#plot(euler(v1, shape = "ellipse"), quantities = TRUE)